#############################################################################################################################

Tumor=$1
Normal=$2
config_file=$3

source ${config_file}

#############################################################################################################################
## 标记repeatmasker

cat ${vcf_qc_path}/${Tumor}_${Normal}_MutQc.csv | grep -v POS | \
awk -F'\t' '{OFS=","}{split($1,spl,":");print spl[1],spl[2],spl[2],$0}' | tr ',' '\t' \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.tmp.tsv

${bedtools} intersect -a ${tmp_path}/${Tumor}_${Normal}_MutQc.tmp.tsv -loj \
-b ${ref_path}/RepeatsRegion/hg19/hg19_RepeatMaskerSimpleRepeat.bed \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.repeatmasker.tmp.tsv

cat ${tmp_path}/${Tumor}_${Normal}_MutQc.repeatmasker.tmp.tsv | \
awk -F'\t' '
{OFS="\t"}{
if($23!="."){label=$26}else{label="."}
print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,label}' \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.repeatmasker.tmp.1.tsv

#############################################################################################################################
## 标记trf
${bedtools} intersect -a ${tmp_path}/${Tumor}_${Normal}_MutQc.repeatmasker.tmp.1.tsv -loj \
-b ${ref_path}/RepeatsRegion/hg19/hg19_SimpleRepeats.bed \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.SimpleRepeats.tmp.tsv

cat ${tmp_path}/${Tumor}_${Normal}_MutQc.SimpleRepeats.tmp.tsv | \
awk -F'\t' '
{OFS="\t"}{
if($24!="."){label="trf"}else{label="."}
print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,label}' \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.SimpleRepeats.tmp.1.tsv


#############################################################################################################################
## 标记端粒的gap区域
${bedtools} intersect -a ${tmp_path}/${Tumor}_${Normal}_MutQc.SimpleRepeats.tmp.1.tsv -loj \
-b ${ref_path}/RepeatsRegion/hg19/hg19_telomere.bed \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.telomere.tmp.tsv

cat ${tmp_path}/${Tumor}_${Normal}_MutQc.telomere.tmp.tsv | \
awk -F'\t' '
{OFS="\t"}{
if($25!="."){label="telomere"}else{label="."}
print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,label}' \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.telomere.tmp.1.tsv


#############################################################################################################################
## 标记着丝粒上下游1mb
${bedtools} intersect -a ${tmp_path}/${Tumor}_${Normal}_MutQc.telomere.tmp.1.tsv -loj \
-b ${ref_path}/RepeatsRegion/hg19/hg19_Centromeres.1MB.bed \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.Centromeres.tmp.tsv

cat ${tmp_path}/${Tumor}_${Normal}_MutQc.Centromeres.tmp.tsv | \
awk -F'\t' '
{OFS="\t"}{
if($26!="."){label="centromeres_1MB"}else{label="."}
print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,label}' \
> ${tmp_path}/${Tumor}_${Normal}_MutQc.Centromeres.tmp.1.tsv

#############################################################################################################################
## 输出
cat ${vcf_qc_path}/${Tumor}_${Normal}_MutQc.csv | head -1 | awk '{print $0",repeatmasker,tandem_repeat_finder,telomere,centromeres_1MB"}' \
> ${vcf_qc_path}/${Tumor}_${Normal}_MutQc.annotationRegion.csv

## 若突变出现在repeatmasker定义区域的连接处，则会产生重复，需要去重
cat ${tmp_path}/${Tumor}_${Normal}_MutQc.Centromeres.tmp.1.tsv | \
awk -F'\t' '
{OFS=","}{
print $4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26}' | sort -u \
>> ${vcf_qc_path}/${Tumor}_${Normal}_MutQc.annotationRegion.csv